# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import os
import numpy as np
import torch
Uses FastAI v0.7 version for this project. To get the correct version, please use the README file.
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
Here are some of the project variables we use for convenience
version = 10
PATH = "./clean-data/"
TMP_PATH = f"/tmp/tmp/resnet34_{version}"
MODEL_PATH = f"/tmp/model/resnet34_{version}"
sz=224
arch=resnet34
The next 2 cells we check that all cuda related resources have been correctly setup.
torch.cuda.is_available()
torch.backends.cudnn.enabled
Here we show the subdirectories of this project. If you do not have the images, remember to get from Kaggle first. Check out the readme file.
os.listdir(PATH)
Loop through the train directory and create the respective filenames and labels. These 2 numpy arrays are used in subsequent model initialization.
fnames = np.array([f'train/{f}' for f in sorted(os.listdir(f'{PATH}train'))])
labels = np.array([(0 if 'cat' in fname else 1) for fname in fnames])
We create a data object and pass it through to the pretrained method. This uses a pretrained resnet34 model. We can also see the results after training for a short 2 epochs.
shutil.rmtree(f'{PATH}tmp', ignore_errors=True)
data = ImageClassifierData.from_names_and_array(
path=PATH,
fnames=fnames,
y=labels,
classes=['dogs', 'cats'],
test_name='test',
tfms=tfms_from_model(arch, sz)
)
learn = ConvLearner.pretrained(arch, data, precompute=True, tmp_name=TMP_PATH, models_name=MODEL_PATH)
learn.fit(1e-3, 2)
In the next few cells, we will just output the different attributes on the data object, this helps us get a good grasp of what the data object and learn object looks like.
data.val_y
data.classes
log_preds = learn.predict()
log_preds.shape
log_preds[:10]
preds = np.argmax(log_preds, axis=1) # from log probabilities to 0 or 1
probs = np.exp(log_preds[:,1]) # pr(dog)
probs
def rand_by_mask(mask): return np.random.choice(np.where(mask)[0], 20, replace=False)
def rand_by_correct(is_correct): return rand_by_mask((preds == data.val_y)==is_correct)
def plots(ims, figsize=(12,6), rows=1, titles=None):
f = plt.figure(figsize=figsize)
for i in range(len(ims)):
sp = f.add_subplot(rows, len(ims)//rows, i+1)
sp.axis('Off')
if titles is not None: sp.set_title(titles[i], fontsize=16)
plt.imshow(ims[i])
The following functions give us a good idea of how wrong an image is and also prints out the image and the filename to ensure that the image is not mislabled.
As we are changing different parameters and experimenting with different techniques, I found quite a few mislabeled images and I have removed and renamed images that are,
def load_img_id(ds, idx): return np.array(PIL.Image.open(PATH+ds.fnames[idx]))
def plot_val_with_title(idxs, title):
imgs = [load_img_id(data.val_ds,x) for x in idxs]
title_probs = [str(probs[x]) + "\r\n[" + data.val_ds.fnames[x] + "]" for x in idxs]
print(title)
return plots(imgs, rows=5, titles=title_probs, figsize=(16,10))
plot_val_with_title(rand_by_correct(True), "Correctly classified")
plot_val_with_title(rand_by_correct(False), "Incorrectly classified")
def most_by_mask(mask, mult):
idxs = np.where(mask)[0]
return idxs[np.argsort(mult * probs[idxs])[:20]]
def most_by_correct(y, is_correct):
mult = -1 if (y==1)==is_correct else 1
return most_by_mask(((preds == data.val_y)==is_correct) & (data.val_y == y), mult)
plot_val_with_title(most_by_correct(0, True), "Most correct cats")
plot_val_with_title(most_by_correct(1, True), "Most correct dogs")
plot_val_with_title(most_by_correct(0, False), "Most incorrect cats")
plot_val_with_title(most_by_correct(1, False), "Most incorrect dogs")
most_uncertain = np.argsort(np.abs(probs -0.5))[:20]
plot_val_with_title(most_uncertain, "Most uncertain predictions")
learn = ConvLearner.pretrained(arch, data, precompute=True, tmp_name=TMP_PATH, models_name=MODEL_PATH)
lrf=learn.lr_find()
learn.sched.plot_lr()
learn.sched.plot()
To use data augmentation, we first need to define what kind of transformations that we want to achieve. I decided on the few because I found that in the first few iteration, the model wasn't very certain about cat and dogs that have their images rotated or with different type of lightings. So here, I chose to add those into the transformations
transforms = transforms_side_on + [
RandomRotate(180, p=0.75, mode=cv2.BORDER_REFLECT, tfm_y=TfmType.NO),
RandomLighting(b=0.5, c=0, tfm_y=TfmType.NO),
RandomZoom(zoom_max=1)
]
tfms = tfms_from_model(arch, sz, aug_tfms=transforms, max_zoom=1.1)
def get_augs():
data = ImageClassifierData.from_names_and_array(
path=PATH,
fnames=fnames,
y=labels,
classes=['dogs', 'cats'],
test_name='test',
tfms=tfms,
num_workers=1,
bs=2
)
x,_ = next(iter(data.aug_dl))
return data.trn_ds.denorm(x)[1]
ims = np.stack([get_augs() for i in range(9)])
A quick visualization of how the images look after transformation.
plots(ims, rows=3)
We add the transformation as a parameter to the data object creation.
data = ImageClassifierData.from_names_and_array(
path=PATH,
fnames=fnames,
y=labels,
classes=['dogs', 'cats'],
test_name='test',
tfms=tfms
)
learn = ConvLearner.pretrained(arch, data, precompute=True, tmp_name=TMP_PATH, models_name=MODEL_PATH)
learn.fit(1e-3, 1)
learn.precompute=False
We then proceed to train the model's last layer while keeping all the other layers untouched. Here we can see that there is only that much improvement we can gain from doing so.
learn.fit(1e-3, 20, cycle_len=2)
To properly visualize learning rate annealing, we can see below a spikey graph. Each of the spike is when the restarts happens for the SGD with restarts.
learn.sched.plot_lr()
Next, we want to now retrain the other layers, so we save and load.
learn.save('224_lastlayer')
learn.load('224_lastlayer')
We then unfreeze the earlier layers.
learn.unfreeze()
Here, we set the different learning rates. The first few layers, since they identify very basic edges, we will set a learning rate that is much lower such as 13-4, the middle layers we will train with learning rate of 1e-3, finally we will then train the last few layers with learning rate of 1e-2.
This is the part where we will apply differential learning rate annealing. The differential part happens when we pass a cycle_mult of 2. We can also visualize this later in the graph, we can see that each peak and through is getting wider with increasing number of epochs.
lr=np.array([1e-4,1e-3,1e-2])
learn.fit(lr, 6, cycle_len=1, cycle_mult=2)
Visualizing differential learning rate.
learn.sched.plot_lr()
learn.save('224_all')
learn.load('224_all')
log_preds,y = learn.TTA()
probs = np.mean(np.exp(log_preds),0)
accuracy_np(probs, y)
preds = np.argmax(probs, axis=1)
probs = probs[:,1]
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, preds)
plot_confusion_matrix(cm, data.classes)
plot_val_with_title(most_by_correct(0, False), "Most incorrect cats")
plot_val_with_title(most_by_correct(1, False), "Most incorrect dogs")
Using the TTA function with is_test set to true, we are now using the Test Time Augmentation feature. During this step, we then take the mean of the results and calculate the probabaility. Finally, we take the probabilities that an image is a dog.
log_preds, y = learn.TTA(is_test=True)
probs = np.mean(np.exp(log_preds),0)
probs_dogs = probs[:,1]
In the next couple of cells, we create the required submission and check that the format is correct.
import re
ids = [re.search(r'\d+',file).group() for file in data.test_ds.fnames]
submission = pd.DataFrame({'id':ids, 'label':probs_dogs})
submission = submission.sort_values(by=['id'])
submission.head()
submission.to_csv(f'submission-{version}.csv', index=False)
Some of the removed images